{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.将上方数据表建立 DataFrame 进行存储"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = {'id':[1001,1001,1002,1003,1001,1002,1002,1003,1001,1002,1001,1002],\n",
    "       'date':['2019-01-01','2019-01-01','2019-01-01','2019-01-01','2019-01-02','2019-01-02','2019-01-02','2019-01-02','2019-01-03','2019-01-03','2019-01-04','2019-01-04'],\n",
    "       'goods':['A','B','A','B','A','A','B','A','A','B','A','A'],\n",
    "       'sales':[3000,2000,3000,2000,3000,3000,2000,3000,3000,2000,3000,3000]}\n",
    "df = pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>goods</th>\n",
       "      <th>sales</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1001</td>\n",
       "      <td>2019-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1001</td>\n",
       "      <td>2019-01-01</td>\n",
       "      <td>B</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1002</td>\n",
       "      <td>2019-01-01</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1003</td>\n",
       "      <td>2019-01-01</td>\n",
       "      <td>B</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1001</td>\n",
       "      <td>2019-01-02</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1002</td>\n",
       "      <td>2019-01-02</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1002</td>\n",
       "      <td>2019-01-02</td>\n",
       "      <td>B</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1003</td>\n",
       "      <td>2019-01-02</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1001</td>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1002</td>\n",
       "      <td>2019-01-03</td>\n",
       "      <td>B</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1001</td>\n",
       "      <td>2019-01-04</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1002</td>\n",
       "      <td>2019-01-04</td>\n",
       "      <td>A</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      id        date goods  sales\n",
       "0   1001  2019-01-01     A   3000\n",
       "1   1001  2019-01-01     B   2000\n",
       "2   1002  2019-01-01     A   3000\n",
       "3   1003  2019-01-01     B   2000\n",
       "4   1001  2019-01-02     A   3000\n",
       "5   1002  2019-01-02     A   3000\n",
       "6   1002  2019-01-02     B   2000\n",
       "7   1003  2019-01-02     A   3000\n",
       "8   1001  2019-01-03     A   3000\n",
       "9   1002  2019-01-03     B   2000\n",
       "10  1001  2019-01-04     A   3000\n",
       "11  1002  2019-01-04     A   3000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.利用 pivot_table 统计该企业每个人每天销售商品 A 和 B 的金额之和"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>2019-01-01</th>\n",
       "      <th>2019-01-02</th>\n",
       "      <th>2019-01-03</th>\n",
       "      <th>2019-01-04</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1001</th>\n",
       "      <td>5000</td>\n",
       "      <td>3000</td>\n",
       "      <td>3000</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>3000</td>\n",
       "      <td>5000</td>\n",
       "      <td>2000</td>\n",
       "      <td>3000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1003</th>\n",
       "      <td>2000</td>\n",
       "      <td>3000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          sales                                 \n",
       "date 2019-01-01 2019-01-02 2019-01-03 2019-01-04\n",
       "id                                              \n",
       "1001       5000       3000       3000       3000\n",
       "1002       3000       5000       2000       3000\n",
       "1003       2000       3000          0          0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,index=['id'],columns=['date'],aggfunc=np.sum,fill_value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.请将上面的统计数据再增加一列，统计出每个人的销售总金额"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"5\" halign=\"left\">sales</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>2019-01-01</th>\n",
       "      <th>2019-01-02</th>\n",
       "      <th>2019-01-03</th>\n",
       "      <th>2019-01-04</th>\n",
       "      <th>All</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1001</th>\n",
       "      <td>5000</td>\n",
       "      <td>3000</td>\n",
       "      <td>3000</td>\n",
       "      <td>3000</td>\n",
       "      <td>14000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>3000</td>\n",
       "      <td>5000</td>\n",
       "      <td>2000</td>\n",
       "      <td>3000</td>\n",
       "      <td>13000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1003</th>\n",
       "      <td>2000</td>\n",
       "      <td>3000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>All</th>\n",
       "      <td>10000</td>\n",
       "      <td>11000</td>\n",
       "      <td>5000</td>\n",
       "      <td>6000</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          sales                                        \n",
       "date 2019-01-01 2019-01-02 2019-01-03 2019-01-04    All\n",
       "id                                                     \n",
       "1001       5000       3000       3000       3000  14000\n",
       "1002       3000       5000       2000       3000  13000\n",
       "1003       2000       3000          0          0   5000\n",
       "All       10000      11000       5000       6000  32000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df,index=['id'],columns=['date'],aggfunc=np.sum,margins=True,fill_value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.请将以上数据存入 DataFrame 中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "data1 = {'sex':['man','man','women','women','man','women','man','women','women'],\n",
    "        'age':[15,23,25,17,35,57,24,31,22],\n",
    "        'smoke':[True,False,False,True,True,False,False,True,False],\n",
    "        'height':[168,179,181,166,173,178,188,190,160]}\n",
    "df1 = pd.DataFrame(data1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sex</th>\n",
       "      <th>age</th>\n",
       "      <th>smoke</th>\n",
       "      <th>height</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>man</td>\n",
       "      <td>15</td>\n",
       "      <td>True</td>\n",
       "      <td>168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>man</td>\n",
       "      <td>23</td>\n",
       "      <td>False</td>\n",
       "      <td>179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>women</td>\n",
       "      <td>25</td>\n",
       "      <td>False</td>\n",
       "      <td>181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>women</td>\n",
       "      <td>17</td>\n",
       "      <td>True</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>man</td>\n",
       "      <td>35</td>\n",
       "      <td>True</td>\n",
       "      <td>173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>women</td>\n",
       "      <td>57</td>\n",
       "      <td>False</td>\n",
       "      <td>178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>man</td>\n",
       "      <td>24</td>\n",
       "      <td>False</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>women</td>\n",
       "      <td>31</td>\n",
       "      <td>True</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>women</td>\n",
       "      <td>22</td>\n",
       "      <td>False</td>\n",
       "      <td>160</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     sex  age  smoke  height\n",
       "0    man   15   True     168\n",
       "1    man   23  False     179\n",
       "2  women   25  False     181\n",
       "3  women   17   True     166\n",
       "4    man   35   True     173\n",
       "5  women   57  False     178\n",
       "6    man   24  False     188\n",
       "7  women   31   True     190\n",
       "8  women   22  False     160"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.请以性别为分组依据，查看男女抽烟平均年龄和身高"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>height</th>\n",
       "      <th>smoke</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>man</th>\n",
       "      <td>24.25</td>\n",
       "      <td>177</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>women</th>\n",
       "      <td>30.40</td>\n",
       "      <td>175</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         age  height  smoke\n",
       "sex                        \n",
       "man    24.25     177    0.5\n",
       "women  30.40     175    0.4"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(df1,index=['sex'],aggfunc=np.mean)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.利用交叉表知识，统计各个性别抽烟的人数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>smoke</th>\n",
       "      <th>False</th>\n",
       "      <th>True</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sex</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>man</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>women</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "smoke  False  True \n",
       "sex                \n",
       "man        2      2\n",
       "women      3      2"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab(df1['sex'],df1['smoke'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.利用交叉表知识，统计各个年龄段抽烟人情况"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>smoke</th>\n",
       "      <th>False</th>\n",
       "      <th>True</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>age</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "smoke  False  True \n",
       "age                \n",
       "15         0      1\n",
       "17         0      1\n",
       "22         1      0\n",
       "23         1      0\n",
       "24         1      0\n",
       "25         1      0\n",
       "31         0      1\n",
       "35         0      1\n",
       "57         1      0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.crosstab(df1['age'],df1['smoke'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
